*******************************************
* Creates state-industry-year dataset with rainfall and labor regulations.
*Source: Our own ASI data
*******************************************
clear
set more off 
set mem 700m
set matsize 4000


global do		"C:\Users\Siddharth\Desktop\factories\do"
global rain		"C:\Users\Siddharth\Desktop\factories\data\rainfall"
global data		"C:\Users\Siddharth\Desktop\factories\data"
global besleyburgess  "C:\Users\Siddharth\Desktop\factories\burgess data\Socioeconomic"
global aghion  "C:\Users\Siddharth\Desktop\factories\burgess data\aghion burgess data\data"
 
*varlist
global varlist  "factories closingvalueland fixedcapitalop fixedcapitalcl landrental totalrent workers workers_2 mandays workerswages totaloutput depreciation electricity fuel materials valueadded profits nic87code"
global varlist2  "factories closingvalueland fixedcapitalop fixedcapitalcl landrental totalrent workers workers_2 mandays workerswages totaloutput depreciation electricity fuel materials valueadded profits"

use "$data\asi_50_55_43_code91.dta", clear
ge statecode88 =  int(asicode88/100)
ge statecode99 =  int(asicode99/100)
keep statecode88 statecode99
duplicates drop
drop if statecode88 == .
sort statecode99
tempfile scodes
save `scodes'


* Using Delaware rainfall data (district_rain_wide_code91) to get rainfall by state-yr

use "$data\asi_50_55_43_code91.dta", clear
sort code91
merge code91 using "$rain\district_rain_wide_code91.dta"
keep if _m == 3
drop _m  bsr96_district bsr00_district asi_district_name asi_state_ut asicode99 distt_id50 distt_id43 distt_id55 code91  longitude latitude glon glat
ge statecode88 =  int(asicode88/100)
collapse (mean) shock30* shock50* shockdev* shockperc* shockctsdev* shockpctile* shocknorm* shockhigh* shocklow*, by(statecode88)
reshape long shock30 shock50 shockdev shockperc shockctsdev  shockpctile shocknorm shockhigh shocklow, i(statecode88) j(year)
* some non-integer shockpctile values
replace shockpctile = -1 if shockpctile < 0
replace shockpctile = 1 if shockpctile > 0
drop if statecode88 == .
sort statecode88 year
tempfile rain_statecode88
save `rain_statecode88'


* ASI 1987-88
* clean factory data
do "$do\process88_sid.do"
keep asicode88 price88 multiplier $varlist
egen x = mean(price88)
replace price88 = x if price88 == .
drop x
* deflating
local l "closingvalueland fixedcapitalop fixedcapitalcl landrental totalrent  workerswages totaloutput depreciation  electricity  fuel materials    valueadded profits"
foreach v of local l{
replace `v' = `v'/price88
}
*multiplying
foreach v of global varlist2{
replace `v' = `v'* multiplier
}
ge statecode88 =  int(asicode88/100)
collapse (sum) $varlist2, by (statecode88 nic87code)
ge year = 1987
tempfile sdist88
save `sdist88'

* ASI 1990-91
do "$do\process91_sid.do"
keep asicode88 price91  multiplier $varlist
*missing price index
egen x = mean(price91)
replace price91 = x if price91 == .
drop x
* deflating
local l "closingvalueland fixedcapitalop fixedcapitalcl landrental totalrent workerswages totaloutput depreciation  electricity  fuel materials valueadded profits"
foreach v of local l{
replace `v' = `v'/price91
}
*multiplying
foreach v of global varlist2{
replace `v' = `v'* multiplier
}
ge statecode88 =  int(asicode88/100)
collapse (sum) $varlist2, by (statecode88 nic87code)
ge year = 1990
tempfile sdist91
save `sdist91'

* ASI 1993-94
do "$do\process94 - ach.do"
keep asicode99 price94 multiplier $varlist
*missing price index
egen x = mean(price94)
replace price94 = x if price94 == .
drop x
* deflating
local l "closingvalueland fixedcapitalop fixedcapitalcl landrental totalrent  workerswages totaloutput depreciation  electricity  fuel materials    valueadded profits"
foreach v of local l{
replace `v' = `v'/price94
}
*multiplying
foreach v of global varlist2{
replace `v' = `v'* multiplier
}

ge statecode99 =  int(asicode99/100)
sort statecode99
merge statecode99 using `scodes'
keep if _m == 3
drop _m
collapse (sum) $varlist2, by (statecode88 nic87code)
ge year = 1993
tempfile sdist94
save `sdist94'





* sort labor regulation data
use $data\labregrainfall.dta, clear
*rename statecode statecode99
*sort statecode99
*merge statecode99 using `scodes'
*keep if _m == 3
*drop _m
sort statecode88 year
tempfile lab_temp
save `lab_temp'




* append factory data and merge with rain data

use `sdist88', clear
append using `sdist91'
append using `sdist94'

sort statecode88  year
merge statecode88 year using `rain_statecode88'

tab _merge
keep if _merge ==3
drop _merge


sort statecode88 year
merge statecode88 year using `lab_temp', keep(statecode88 year nstrict  prow proe APcode APDS Bhatt EPLprow DSproe DSprow Bhattproe Bhattprow)

* proe is proemployer state, prow is proworker state
*ge proe = nstrict < 0
*ge prow = nstrict > 0
*replace prow = . if nstrict == .


* rainshock*nic3 interactions

ge nic3 = nic87code

tab nic3, ge(nic3dummy)

local ic =  1
while `ic' <= 112{
ge shocknorm_nic3dum`ic' = shocknorm*nic3dummy`ic'
ge shockpctile_nic3dum`ic' = shockpctile*nic3dummy`ic'
local ic = `ic' + 1
} 


* interactions of rainshock with labor reg and other state characterisics
local l "shocknorm shockpctile"
foreach v of local l{
local g " nstrict  prow proe APcode APDS Bhatt EPLprow DSproe DSprow Bhattproe Bhattprow"
foreach w of local g{
ge `w'_`v' = `w'*`v'
}
}



keep if year == 1987|year == 1990| year == 1993
drop if nic3 == .
drop  if nstrict == .
ge statenic3 = statecode88*1000 + nic3
ge stateyr = statecode88*10000 + year 

saveold "$data\sid_state_industry_panel.dta", replace
